slugs <- read.table( 'http://www.bio.ic.ac.uk/research/mjcraw/statcomp/data/slugsurvey.txt', header=TRUE) slug.table <- table(slugs$slugs, slugs$field) slugtable <- data.frame(table(slugs$slugs, slugs$field)) slugtable$Var1.num <- as.numeric(as.character(slugtable$Var1)) # common mean Poisson model pois1.LL <- function(p){ LL<-sum(log(dpois(slugs$slugs,lambda=p))) LL } negpois1.LL<-function(p){ -pois1.LL(p) } out.pois1 <- nlm(negpois1.LL, c(1.2)) # separate means Poisson model pois2.LL <- function(p){ mu <- p[1] + p[2]*(slugs$field=="Rookery") LL <- sum(log(dpois(slugs$slugs, lambda=mu))) LL } negpois2.LL <- function(p){ -pois2.LL(p) } out.pois2 <- nlm(negpois2.LL, c(1.2,1)) # model 1: single mean negative binomial model negNB1.LL <- function(p) { mu <- p[1] theta <- p[2] LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } out.NB1 <- nlm(negNB1.LL, c(2,1)) out.NB1 # model 2: two means negative binomial model negNB2.LL <- function(p) { mu <- p[1] + p[2]*(slugs$field=='Rookery') theta <- p[3] LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } # obtain estimates out.NB2 <- nlm(negNB2.LL, c(2,1,1)) out.NB2 # model 3: single mean, two dispersions negative binomial model negNB3.LL <- function(p) { mu <- p[1] theta <- p[2] + p[3]*(slugs$field=='Rookery') LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } # obtain estimates out.NB3 <- nlm(negNB3.LL, c(2,1,1)) out.NB3 # model 4: two means, two dispersions negative binomial model negNB4.LL <- function(p) { mu <- p[1] + p[2]*(slugs$field=='Rookery') theta <- p[3] + p[4]*(slugs$field=='Rookery') LL <- sum(log(dnbinom(slugs$slugs, mu=mu, size=theta))) -LL } # obtain estimates out.NB4 <- nlm(negNB4.LL, c(2,1,1,1)) out.NB4 # collect log-likelihoods of the various models LL <- sapply(list(out.pois1,out.pois2,out.NB1,out.NB2,out.NB3,out.NB4), function(x) -x$minimum) LL # count number of estimated parameters k <- sapply(list(out.pois1,out.pois2,out.NB1,out.NB2,out.NB3,out.NB4), function(x) length(x$estimate)) k labels<-c('Pois1','Pois2','NB1','NB2','NB3','NB4') output <- data.frame(labels,LL,k) output #### fit the models using standard R functions #### # Poisson model with identity link versus log link glm(slugs~1, data=slugs, family=poisson(link=identity)) glm(slugs~1, data=slugs, family=poisson) # Poisson models glm.pois1 <- glm(slugs~1, data=slugs, family=poisson) glm.pois2 <- glm(slugs~field, data=slugs, family=poisson) # negative binomial models with common dispersion library(MASS) glm.NB1 <- glm.nb(slugs~1, data=slugs) glm.NB2 <- glm.nb(slugs~field, data=slugs) # negative binomial models with separate dispersions library(gamlss) library(help=gamlss) ?gamlss glm.NB3 <- gamlss(slugs~1, sigma.formula=~field, data=slugs, family=NBI) glm.NB3 # log-likelihoods match -out.NB3$minimum logLik(glm.NB3) # parameter estimates out.NB3$estimate coef(glm.NB3) methods(coef) ?coef.gamlss # to extract dispersion parameters we need to use the what argument coef(glm.NB3,what='sigma') # theta for nursery slugs out.NB3$estimate 1/exp(coef(glm.NB3, what='sigma')[1]) #theta for rookery slugs 1/exp(sum(coef(glm.NB3, what='sigma'))) sum(out.NB3$estimate[2:3]) # separate dispersions and separate means glm.NB4 <- gamlss(slugs~field, sigma.formula=~field, data=slugs, family=NBI) # log-likelihoods logLik(glm.NB4) -out.NB4$minimum # collect log-likelihoods together LL.glm <- sapply(list(glm.pois1,glm.pois2,glm.NB1,glm.NB2,glm.NB3,glm.NB4), logLik) LL.glm LL # number of estimated parameters for Poisson models k.pois <- sapply(list(glm.pois1,glm.pois2),function(x) length(coef(x))) # number of estimated parameters for glm.nb models k.nb <- sapply(list(glm.NB1,glm.NB2),function(x) length(coef(x)))+1 # number of estimated parameters for gamlss models k.gamlss <- sapply(list(glm.NB3,glm.NB4), function(x) length(coef(x)) + length(coef(x,what='sigma'))) k.glm <- c(k.pois, k.nb, k.gamlss) data.frame(labels, k=k.glm, LL=LL.glm) output # LR test comparing Pois2 and Pois1: beta1 = 0 anova(glm.pois1,glm.pois2,test='Chisq') 2*(output$LL[2]-output$LL[1]) 1-pchisq(2*(output$LL[2]-output$LL[1]), output$k[2]-output$k[1]) # conclusion: reject Pois1 # compare NB2 and Pois2 # LR-test for testing a boundary condition, 1/theta = 0 output # LR statistic 2*(output$LL[4]-output$LL[2]) # critical value .5*qchisq(.95,1) # p-value .5*(1-pchisq(2*(output$LL[4]-output$LL[2]),2)) # conclusion: reject Pois2 # compare NB1 and Pois1 # LR-test for testing a boundary condition, 1/theta = 0 # LR statistic 2*(output$LL[3]-output$LL[1]) # critical value .5*qchisq(.95,1) # p-value .5*(1-pchisq(2*(output$LL[3]-output$LL[1]),2)) # conclusion: reject Pois1 # compare NB1 and NB2: beta1 = 0 anova(glm.NB1,glm.NB2,test='Chisq') 2*(output$LL[5]-output$LL[3]) 1-pchisq(2*(output$LL[5]-output$LL[3]), output$k[5]-output$k[3]) # conclusion: reject NB2 # compare NB1 and NB3: theta1 = 0 2*(output$LL[5]-output$LL[3]) 1-pchisq(2*(output$LL[5]-output$LL[3]), output$k[5]-output$k[3]) # conclusion: reject NB1 # compare NB4 and NB2: theta1=0 2*(output$LL[6]-output$LL[4]) qchisq(.95,1) 1-pchisq(2*(output$LL[6]-output$LL[4]),1) # conclusion: reject NB2 # compare NB3 and NB4: beta1 = 0 2*(output$LL[6]-output$LL[5]) 1-pchisq(2*(output$LL[6]-output$LL[5]), output$k[6]-output$k[5]) # conclusion: reject NB4 # obtain expected counts of NB3 model # mean slugtable$mu <- out.NB3$estimate[1] # theta slugtable$theta <- out.NB3$estimate[2] + out.NB3$estimate[3]*(slugtable$Var2=='Rookery') # calculate NB probabilities of each count slugtable$p <- dnbinom(slugtable$Var1.num, mu=slugtable$mu, size=slugtable$theta) sum(slugtable$p[1:11]) # add tail probability to X=10 category slugtable$p2 <- slugtable$p + pnbinom(10, mu=slugtable$mu, size=slugtable$theta, lower.tail=F)*(slugtable$Var1.num==10) slugtable # create a data frame of sample sizes data.frame(table(slugs$field)) n.data <- data.frame(table(slugs$field)) n.data names(n.data) <- c('Var2','n') n.data # add samples sizes to data set slugtable2 <- merge(slugtable,n.data) # calculate expected counts slugtable2$exp <- slugtable2$n*slugtable2$p2 slugtable2 library(lattice) xyplot(Freq~Var1.num|Var2, data=slugtable2, xlab='Count category', panel=function(x, y, subscripts) { panel.xyplot(x, y, type='h', lineend=1, col='grey', lwd=10) # NB3 counts panel.points(x, slugtable2$exp[subscripts], pch=16, cex=.6, col=1, type='o')})